### Replication file for 
### "Does Chinese FDI in Africa Inspire for a China Model of Development"

### Part 1: Connect the geocoded FDI data with Afrobarometer Round 6
rm(list=ls())

### Use "Alt+O" to collapse all sections and "Shift+Alt+O" to expand all

### Set your own working directory

# Load required packages --------------------------------------------------
library(sf)          # classes and functions for vector data
library(spData)        # load geographic data
library(lwgeom)        # calculating distance
library(tidyverse)

# Geocoded project-level FDI data -----------------------------------------

### read the original data into R
fdi<-read_csv("FDIProjGeocode.csv")

### FDI projects that are precisely coded, from mainland China, and new
fdi_precise<-fdi %>% 
  filter(Precision_final %in% c(1,2,9),
         Source_country=="China",
         Project_type!="Expansion",
         Project_type!="Co-location")

# Create or recode variables for FDI projects -----------------------------

### Year of announcement (if missing, use the year in the dataset)
fdi_precise<-fdi_precise %>% 
  mutate(Ancmtyear=as.numeric(Ancmtyear),
         Ancmtyear=ifelse(is.na(Ancmtyear),Year,Ancmtyear)
         ) 

### Year of operation (if missing, code it as 9999)
fdi_precise<-fdi_precise %>% 
  mutate(Opyear_final=as.numeric(Opyear_final),
         Opyear_final=ifelse(is.na(Opyear_final),9999,Opyear_final))

### Three sectors: manufacturing, resources, service
fdi_precise<-fdi_precise %>% 
  mutate(Sector_type=ifelse(Sector %in% c("Coal, oil & gas","Metals",
                                          "Minerals","Renewable energy"),
                            "Resource",
                            ifelse(Sector %in% c("Communications","Aerospace",
                                                 "Real estate","Healthcare",
                                                 "Financial services",
                                                 "Business services",
                                                 "Warehousing","Transportation",
                                                 "Software & IT services"),
                                   "Service","Manufacture"))
         )

### Ownership type
fdi_precise<-fdi_precise %>% 
  mutate(Ownership_final=trimws(Ownership_final),# trim the spaces
         Ownership_final=ifelse(Ownership_final!="POE",
                                "State-owned firms","Private firms"))

### Countries in the FDI data
country_fdi<-fdi_precise %>% pull(Destination_country) %>% unique()

### Convert the FDI data to "sf" type for geocomputation
fdi_precise_sf<-
  st_as_sf(fdi_precise,
           coords=c("xcoord","ycoord"),# x (longitude) and y (latitude)
           crs=4326 # specify the coordination reference system
  )

# Afrobarometer round 6 data ----------------------------------------------
Afb_R6<-read_csv("afb_full_r6.csv")

# Create or recode variables for Afrobarometer data -----------------------

### Year of interview
Afb_R6<-Afb_R6 %>% 
  mutate(yearintr=str_sub(dateintr,-2,-1) %>% 
           paste(20,.,sep="") %>% 
           as.numeric())

### Countries in Afrobarometer round 6
country_list<-strsplit(Afb_R6$geoname_adm_name,"|",fixed=T)
country_list<-unlist(lapply(country_list,"[",3))
Afb_R6<-Afb_R6 %>% 
  mutate(countryname=country_list,
         #disputed territory: 
         #Dakhla is currently occupied by Morocco
         countryname=ifelse(countryname=="Western Sahara","Morocco",
                            ifelse(countryname=="Ivory Coast",
                                   "Cote d'Ivoire",countryname))
                              )
country_r6<-Afb_R6 %>% pull(countryname) %>% unique()

# Collapse the afrobarometer data by survey cluster -----------------------

### Create survey cluster and collapse the data (to make things faster)
Cluster_R6<-Afb_R6 %>% 
  mutate(survey_cluster=paste("(",longitude,",",latitude,")",sep="")) %>% 
  subset(select=c(countryname,survey_cluster,longitude,latitude,yearintr)) %>% 
  unique()

### Convert the survey-cluster data to sf type
Cluster_R6_sf<-
  st_as_sf(Cluster_R6,
           coords=c("longitude","latitude"),# x (longitude) and y (latitude)
           crs=4326 # specify the coordination reference system
           )

# Plot Figure 1 (map): locations of Chinese FDI and surveyed areas --------

### Map of Africa
windows(width = 600,height = 500)
par(mar=c(1,1,1,1)) # set the graphic margin

world_africa<-world[world$continent=="Africa",]

plot(world_africa[0],lwd=2,reset=F)

### Plot the surveyed areas in Afrobarometer
plot(Cluster_R6_sf[0],add=T,pch=16,cex=0.5,col="grey80",alpha=0.5)

### Plot the FDI projects
plot(fdi_precise_sf[0],add=T,pch=2,cex=1.2,col="black",lwd=2)

### Add legend
legend(-25,-5,legend=c("Chinese FDI projects","Surveyed areas"),
       col=c("black","grey80"),
       pch=c(2,16))

### Export the Map (Figure 1)
# p_map<-recordPlot()
# tiff(width = 3000,height = 2000,"Map.tiff",res=300)
# p_map
# dev.off()

# Distance matrix ---------------------------------------------------------

### Countries in both FDI and Afb
country_r6_fdi<-intersect(country_r6,country_fdi)

### Distance buffers
dis_buffer<-c(25,50,75,100,125,150,175,200)

### An empty list to store the wide form distance matrix
distance_matrix_fdi<-list()

### An empty list to store the long form merged data
distance_long_fdi<-list()

### Distance of each FDI to each survey cluster: a loop over countries
for (i in 1:length(country_r6_fdi)){
  
  # FDI projects in country i
  fdi_country<-fdi_precise %>% 
    filter(Destination_country==country_r6_fdi[i]) %>% 
    subset(select=c(FDIProj,Sector_type,Ownership_final,
                    Opyear_final,Ancmtyear,Capital_city))
  
  # Pull the project id of country i
  proj_id<-fdi_country %>% pull(FDIProj)
  
  # Survey cluster in country i
  cluster<-Cluster_R6 %>% 
    filter(countryname==country_r6_fdi[i]) %>% 
    subset(select=c(survey_cluster,yearintr,countryname))

  # Calculate the distance (unit km), this returns a "unit" type object
  distance_matrix_fdi[[i]]<-
    st_distance(Cluster_R6_sf %>% 
                  filter(countryname==country_r6_fdi[i]),
                fdi_precise_sf %>% 
                  filter(Destination_country==country_r6_fdi[i]))/1000

  # Convert the "unit" type object to a data frame
  distance_matrix_fdi[[i]]<-distance_matrix_fdi[[i]] %>% as.data.frame()
  
  # Convert each column of the data frame from "unit" to "numeric"
  distance_matrix_fdi[[i]]<-apply(distance_matrix_fdi[[i]],2,as.numeric)
  
  # Change the column names to the id of projects
  colnames(distance_matrix_fdi[[i]])<-proj_id
  
  # Add the survey cluster area to the row
  distance_matrix_fdi[[i]]<-
    cbind.data.frame(cluster,distance_matrix_fdi[[i]],stringsAsFactors=F)
  
  # Convert the wide form to long form
  distance_long_fdi[[i]]<-distance_matrix_fdi[[i]] %>% 
    pivot_longer(.,!c("survey_cluster","yearintr","countryname"),
                 names_to = "FDIProj",values_to = "distance")
  
  # Merge "distance_long_fdi" with "fdi_country"
  distance_long_fdi[[i]]<-merge(distance_long_fdi[[i]],
                                fdi_country,by=c("FDIProj"),all.x=T)
  
  flush.console()
  print(i)
  
}

# Status based on distance, project stage, and interview time  ------------

### Identify active, eventual, and not close
cluster_project_fdi<-rep(list(rep(list(0),length(country_r6_fdi))),
                         length(dis_buffer))  
for (k in 1:length(dis_buffer)){
  
  for (i in 1:length(country_r6_fdi)){
    
    cluster_project_fdi[[k]][[i]]<-distance_long_fdi[[i]] %>% 
      mutate(D_status=ifelse(distance>dis_buffer[k],"Not close",
                             ifelse(yearintr>Opyear_final,"Active",
                                    "Eventual")))
    
  }
  # row bind each country by the same distance buffers
  cluster_project_fdi[[k]]<-bind_rows(cluster_project_fdi[[k]])
}  

### Collapse the data by each survey cluster (it takes some time)
data_cluster_FDI<-list() 

for (k in 1:length(dis_buffer)){
  
  data_cluster_FDI[[k]]<-cluster_project_fdi[[k]] %>%
    # Sort by survey_cluster and year of interview
    arrange(survey_cluster,yearintr) %>%
    # Group by survey_cluster and year of interview
    group_by(survey_cluster,yearintr) %>% 
    
    # Whether the cluster is close to a project
    mutate(
           # Number of projects the cluster close to
           Num_fdi_close=sum(D_status!="Not close"),
           # Number of active projects the cluster close to
           Num_fdi_active=sum(D_status=="Active"),
           # Number of eventual projects the cluster close to
           Num_fdi_eventual=sum(D_status=="Eventual"),
           
           # Number of active resource projects the cluster close to
           Num_fdi_resource_active=sum((Sector_type=="Resource" & 
                                          D_status=="Active")),
           # Number of eventual resource projects the cluster close to
           Num_fdi_resource_eventual=sum((Sector_type=="Resource" & 
                                            D_status=="Eventual")),
           
           # Number of active service projects the cluster close to
           Num_fdi_service_active=sum((Sector_type=="Service" & 
                                         D_status=="Active")),
           # Number of eventual service projects the cluster close to
           Num_fdi_service_eventual=sum((Sector_type=="Service" & 
                                           D_status=="Eventual")),
           
           # Number of active manufacturing projects the cluster close to
           Num_fdi_manuf_active=sum((Sector_type=="Manufacture" & 
                                       D_status=="Active")),
           # Number of eventual manufacturing projects the cluster close to
           Num_fdi_manuf_eventual=sum((Sector_type=="Manufacture" & 
                                         D_status=="Eventual")),
           
           # Number of active projects in capital cities the cluster close to
           Num_fdi_capital_active=sum((Capital_city==1 & D_status=="Active")),
           # number of eventual projects in capital cities the cluster close to
           Num_fdi_capital_eventual=sum((Capital_city==1 & 
                                           D_status=="Eventual")),
           
           # Distance to the closest active project
           Distance_active_min=min(distance[D_status=="Active"]),
           # Distance to the closest eventual project
           Distance_eventual_min=min(distance[D_status=="Eventual"]),
           # Distance of not close
           Distance_no_min=min(distance[D_status=="Not close"]),
           
           # Maximum duration of the active project
           Duration_active_max=max((yearintr-Opyear_final)[D_status=="Active"]),
           # Minimum duration of the active project
           Duration_active_min=min((yearintr-Opyear_final)[D_status=="Active"]),
           # Minimum time to be announced
           Time_eventual_min=min((Opyear_final-yearintr)[D_status=="Eventual"]),
           
           # Dummy: close to at least one project (active or eventual)
           D_close_fdi=ifelse(Num_fdi_close>0,1,0),
           # Dummy: close to at least one active project
           D_close_active_fdi=ifelse(Num_fdi_active>0,1,0),
           # Dummy: only close to eventual projects but not active ones
           D_close_eventual_fdi=ifelse((Num_fdi_eventual>0 & 
                                          Num_fdi_active==0),1,0)) %>% 
    
    # subset the dataset
    subset(select=c(survey_cluster,yearintr,
                    Num_fdi_close,Num_fdi_active,Num_fdi_eventual,
                    Num_fdi_resource_active,Num_fdi_resource_eventual,
                    Num_fdi_service_active, Num_fdi_service_eventual,
                    Num_fdi_manuf_active,Num_fdi_manuf_eventual,
                    Num_fdi_capital_active,Num_fdi_capital_eventual,
                    D_close_fdi,D_close_active_fdi,D_close_eventual_fdi,
                    Distance_active_min,Distance_eventual_min,Distance_no_min,
                    Duration_active_max,Duration_active_min,
                    Time_eventual_min)) %>% 
    # collapse the data by survey_cluster and year of interview
    unique() %>% 
    as.data.frame()
  
  flush.console()
  print(k)
  
}

# Merge FDI information with Afrobarometer data ---------------------------
Afb_R6<-Afb_R6 %>% 
  mutate(survey_cluster=paste("(",longitude,",",latitude,")",sep=""))
data_respno<-list()
for (j in 1:length(dis_buffer)){
  data_respno[[j]]<-merge(data_cluster_FDI[[j]],Afb_R6,
                          by=c("survey_cluster","yearintr"),all.x=T)
}

# Recode dependent variables ----------------------------------------------
for (j in 1:length(data_respno)){
  
  data_respno[[j]]<-data_respno[[j]] %>% 
    
    mutate(
           # Best development model: choose one country
           BestModel_China=ifelse(q80a==-1,NA,ifelse(q80a==2,1,0)),
           BestModel_US=ifelse(q80a==-1,NA,ifelse(q80a==1,1,0)),
           BestModel_Colonial=ifelse(q80a==-1,NA,ifelse(q80a==3,1,0)),
           
           # Is China's economic and political influence positive
           Positive_O=ifelse(q81b %in% c(1,2,3,4,5),q81b,NA),
           Positive_D=ifelse(q81b==-1,NA,ifelse(q81b %in% c(4,5),1,0)),
           
           # factors that contribute most to China's positive image
           Business_Positive=ifelse(q81c==-1,NA,ifelse(q81c==4,1,0)),
           Infrastructure_Positive=ifelse(q81c==-1,NA,ifelse(q81c==3,1,0)),
           ProductCost_Positive=ifelse(q81c==-1,NA,ifelse(q81c==5,1,0)),
           People_Positive=ifelse(q81c==-1,NA,ifelse(q81c==6,1,0)),
           
           # factors that contribute most to China's negative image
           Extract_Negative=ifelse(q81d==-1,NA,ifelse(q81d==1,1,0)),
           Land_Negative=ifelse(q81d==-1,NA,ifelse(q81d==2,1,0)),
           JobBusiness_Negative=ifelse(q81d==-1,NA,ifelse(q81d==4,1,0)),
           ProductQuality_Negative=ifelse(q81d==-1,NA,ifelse(q81d==5,1,0)),
           People_Negative=ifelse(q81d==-1,NA,ifelse(q81d==6,1,0))
    )
}

# Recode control variables ------------------------------------------------
edu_levels<-c("No formal school","Informal school only","Some primary school",
              "Primary school","Some secondary school","Secondary school",
              "Post-secondary qualifications","Some university",
              "University","Post-graduate")

for (j in 1:length(data_respno)){
  
  data_respno[[j]]<-data_respno[[j]] %>% 
    
    mutate(
           # Living in urban or rural areas
           Urban_level=ifelse(urbrur==1,"Urban",ifelse(urbrur==2,"Rural",
                                                 "Semi-urban")),
           Urban=ifelse(Urban_level=="Urban",2,
                        ifelse(Urban_level=="Rural",0,1)),
           
           # Age
           Age=ifelse(q1==-1,NA,ifelse(q1<=105,q1,NA)),
           Age_cut=cut_number(Age,n=4),
           
           # Gender
           Gender=thisint-1,
           Gender_level=ifelse(thisint==1,"Male","Female"),
           
           # Education
           Education=ifelse(q97 %in% c(0,1,2,3,4,5,6,7,8,9),q97,NA),
           Education_level=as.factor(Education)
    )
  levels(data_respno[[j]]$Education_level)<-edu_levels
}


# Subset the dataset with only variables that will be used ----------------
for (j in 1:length(data_respno)){
  
  data_respno[[j]]<-data_respno[[j]] %>% 
    subset(select=c(survey_cluster:uniqueea,countryname:Education_level))
  
}
names(data_respno)<-c("25km","50km","75km","100km","125km","150km",
                      "175km","200km")

# Save the dataset for analysis in the next step --------------------------
save(data_respno,file="data_respno.RData")